{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numpy import loadtxt\n",
    "from xgboost import XGBClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**数据输入，分出变量X和标签Y**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dataset = loadtxt('dataset_001.csv', delimiter=\",\")\n",
    "\n",
    "X = dataset[:,0:8]\n",
    "Y = dataset[:,8]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**将数据集分为训练集和测试集，测试机用来预测，训练集用来学习**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "seed = 7\n",
    "test_size = 0.33\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "直接使用xgboost封装好的分类器和回归器，可以直接使用XGBClassifier建立模型\n",
    "\n",
    "http://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,\n",
       "       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,\n",
       "       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,\n",
       "       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
       "       silent=True, subsample=1)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 不可视化数据集\n",
    "#model = XGBClassifier()\n",
    "#model.fit(X_train, y_train)\n",
    "\n",
    "##可视化测试集的loss\n",
    "model = XGBClassifier()\n",
    "eval_set = [(X_test, y_test)]\n",
    "model.fit(X_train, y_train, early_stopping_rounds=10, eval_metric=\"logloss\", eval_set=eval_set, verbose=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "xgboost的结果是每一个样本属于第一类的概率，要使用round将其转换为0 1值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_test)\n",
    "predictions = [round(value) for value in y_pred]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 77.56%\n"
     ]
    }
   ],
   "source": [
    "accuracy = accuracy_score(y_test, predictions)\n",
    "print(\"Accuracy: %.2f%%\" % (accuracy * 100.0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 对每个特征的重要性进行分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEWCAYAAACOv5f1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt8VOW1//HPgoBcorEY5BLAlKJFIBAgFnrqkaSCAsWq\n6M+jxQugBy+toAUR1KK2tVItAuWoCFZF9GirgjfQ2iJRqyIFGy6toG2JgCAX5RbAI4H1+2M2OFwz\nmOzMTPb3/XrNi72ffVsrJGv2PHvPfszdERGRaKmV7ABERKT6qfiLiESQir+ISASp+IuIRJCKv4hI\nBKn4i4hEkIq/yAHMbLKZ/SzZcYiEyXSfv1QVMysFmgC745pPcfc1ldhnIfCEu7eoXHTpycweA1a7\n+23JjkVqFp35S1U7x90z415fu/BXBTPLSObxK8PMaic7Bqm5VPylWphZdzN7x8w2m9mi4Ix+77JB\nZvaBmW0zs3+b2dVBe0PgFaC5mZUFr+Zm9piZ/TJu+0IzWx03X2pmN5vZYmC7mWUE2z1nZhvMbIWZ\nDT1CrPv2v3ffZjbSzNab2VozO8/M+prZh2b2uZndErftHWb2rJn9PsjnfTPrFLf8VDMrDn4Ofzez\nHx5w3AfNbLaZbQeuBAYAI4PcXwrWG2Vm/wr2/w8zOz9uHwPN7C9m9hsz2xTk2idueSMze9TM1gTL\nn49b1s/MSoLY3jGzjgn/B0vaUfGX0JlZDjAL+CXQCBgBPGdmjYNV1gP9gOOAQcB4M+vi7tuBPsCa\nr/FJ4hLgB8DxwB7gJWARkAOcCdxgZmcnuK+mQL1g2zHAVOBSoCvwn8DPzOybceufCzwT5Pq/wPNm\nVsfM6gRxvAacCFwPPGlm347b9kfAXcCxwOPAk8A9Qe7nBOv8KzhuFnAn8ISZNYvbRzdgOZAN3AP8\nzswsWDYdaAC0D2IYD2BmnYFHgKuBE4CHgBfN7JgEf0aSZlT8pao9H5w5bo47q7wUmO3us919j7v/\nCVgA9AVw91nu/i+PeYNYcfzPSsbxW3df5e47gdOAxu7+c3f/0t3/TayAX5zgvnYBd7n7LuBpYkV1\nortvc/e/A/8AOsWtv9Ddnw3Wv4/YG0f34JUJjA3ieB14mdgb1V4vuPvbwc/pi0MF4+7PuPuaYJ3f\nAx8B34lb5WN3n+ruu4FpQDOgSfAG0Qe4xt03ufuu4OcNMAR4yN3fc/fd7j4N+L8gZqmB0rY/VFLW\nee7+5wPaTgL+n5mdE9dWB5gLEHRL3A6cQuyEpAGwpJJxrDrg+M3NbHNcW23grQT39VlQSAF2Bv+u\ni1u+k1hRP+jY7r4n6JJqvneZu++JW/djYp8oDhX3IZnZ5cBPgdygKZPYG9Jen8Ydf0dw0p9J7JPI\n5+6+6RC7PQm4wsyuj2urGxe31DAq/lIdVgHT3f2/D1wQdCs8B1xO7Kx3V/CJYW83xaFuR9tO7A1i\nr6aHWCd+u1XACnc/+esE/zW03DthZrWAFsDe7qqWZlYr7g2gFfBh3LYH5rvfvJmdROxTy5nAu+6+\n28xK+OrndSSrgEZmdry7bz7Esrvc/a4E9iM1gLp9pDo8AZxjZmebWW0zqxdcSG1B7OzyGGADUB58\nCjgrbtt1wAlmlhXXVgL0DS5eNgVuqOD484FtwUXg+kEMHczstCrLcH9dzax/cKfRDcS6T+YB7wE7\niF3ArRNc9D6HWFfS4awDWsfNNyT2hrABYhfLgQ6JBOXua4ldQH/AzL4RxHBGsHgqcI2ZdbOYhmb2\nAzM7NsGcJc2o+Evo3H0VsYugtxArWquAm4Ba7r4NGAr8AdhE7ILni3HbLgOeAv4dXEdoTuyi5SKg\nlNj1gd9XcPzdxC4o5wMrgI3Aw8QumIbhBeC/iOVzGdA/6F//klix7xPE8ABweZDj4fwOaLf3Goq7\n/wMYB7xL7I0hD3j7KGK7jNg1jGXELrTfAODuC4D/Bv4niPufwMCj2K+kGX3JS6QKmdkdQBt3vzTZ\nsYgcic78RUQiSMVfRCSC1O0jIhJBOvMXEYmglL3P//jjj/c2bdokO4xK2759Ow0bNkx2GJWmPFKL\n8kgtqZTHwoULN7p744rWS9ni36RJExYsWJDsMCqtuLiYwsLCZIdRacojtSiP1JJKeZjZx4msp24f\nEZEIUvEXEYkgFX8RkQhS8RcRiSAVfxGRCFLxFxGJIBV/EZEIUvEXEYkgFX8RkQhS8RcRiSAVfxGR\nCFLxFxGJIBV/EZEIUvEXEYkgFX8RkQhS8RcRiSAVfxGRCFLxFxGJIBV/EZGQDR48mBNPPJEOHTrs\na7vpppto27YtHTt25Pzzz2fz5s0A/OlPf6Jr167k5eXRtWtXXn/99VBiCq34m9lQM/vAzD4xsy1m\nVhK8xoR1TBGRVDRw4EBeffXV/dp69erF0qVLWbx4Maeccgp33303ANnZ2bz00kssWbKEadOmcdll\nl4USk7l7ODs2Wwb0BNoAI9y939Fs36p1G6910cRQYqtOw/PKGbckI9lhVJrySC3KI7UcKo/SsT/Y\nf760lH79+rF06dKDtp85cybPPvssTz755H7t7s4JJ5zA2rVrOeaYYxKKxcwWuntBReuFcuZvZpOB\n1sArQOcwjiEiUlM88sgj9OnT56D25557ji5duiRc+I9GKMXf3a8B1gBFwN+A/zCzxWb2ipm1D+OY\nIiLp6K677iIjI4MBAwbs1/73v/+dm2++mYceeiiU44bZ7VMKFABfAnvcvczM+gIT3f3kw2wzBBgC\nkJ3duOuYCVNDia06NakP63YmO4rKUx6pRXmklkPlkZeTtd/8p59+yujRo3n00Uf3tb366qu89NJL\njBs3jnr16u1r37BhAz/96U8ZOXIkeXl5RxVLUVFRQt0+oXe2ufvWuOnZZvaAmWW7+8ZDrDsFmAKx\nPv+a2heYjpRHalEeqeWQff4DCvefLy2lYcOGFBbG2l999VVefPFF3njjDRo3brxvvc2bN9OjRw8m\nTpxI//79wwva3UN5AaVANtCUrz5hfAdYuXf+SK9TTjnFa4K5c+cmO4QqoTxSi/JILRXlcfHFF3vT\npk09IyPDc3Jy/OGHH/Zvfetb3qJFC+/UqZN36tTJr776and3/8UvfuENGjTY196pUydft25dwrEA\nCzyBGl0db7kXAteaWTmwE7g4CFBEJBKeeuqpg9quvPLKQ6572223cdttt4UdUnjF391zg8n/CV4i\nIpIi9A1fEZEIUvEXEYkgFX8RkQhS8RcRiSAVfxGRCFLxFxGJIBV/EZEIUvEXEYkgFX8RkQhS8RcR\niSAVfxGRCFLxFxGJIBV/EZEIUvEXEYkgFX8RkQhS8ReRtDB48GBOPPFEOnTosK/t888/p1evXpx8\n8sn06tWLTZs2ATB//nzy8/PJz8+nU6dOzJw5M1lhp6wwB3AfClwL/ANoDnQBbnX33ySyfavWbbzW\nRRNDia061eQxStOR8kgtR8qjdOwP9pt/8803yczM5PLLL2fp0qUAjBw5kkaNGjFq1CjGjh3Lpk2b\n+PWvf82OHTuoW7cuGRkZrF27lk6dOrFmzRoyMsL5mRUXF+8bmzfZzCyhAdzDPPO/DuhF7A1gKJBQ\n0RcROZQzzjiDRo0a7df2wgsvcMUVVwBwxRVX8PzzzwPQoEGDfYX+iy++wMyqN9g0EErxN7PJQGvg\nFWCAu/8V2BXGsUQkutatW0ezZs0AaNq0KevWrdu37L333qN9+/bk5eUxefLk0M7601UoPw13v8bM\negNF7r4x0e3MbAgwBCA7uzFj8srDCK9aNakf+2ib7pRHaolCHsXFxQe1ffrpp2zfvn3fsvLy8v3W\n2717937z999/Px9//DG33HILDRs2pG7dulUY/VfKysoOGW8qS6m3QnefAkyBWJ9/Te/TTCfKI7VE\nIY/SAYUHt5WW0rBhw3396zk5OXz729+mWbNmrF27lubNmx+y733atGk0atSIgoIKu8K/llTq809U\nyv721K9Tm+UHXPBJR8XFxYf8JU43yiO1KI+YH/7wh0ybNo1Ro0Yxbdo0zj33XABWrFhBy5YtycjI\n4OOPP2bZsmXk5uZWTdA1RMoWfxGReJdccgnFxcVs3LiRFi1acOeddzJq1Cguuugifve733HSSSfx\nhz/8AYC//OUvjB07ljp16lCrVi0eeOABsrOzk5xBagm9+JtZU2ABcBywx8xuANq5+9awjy0iNcdT\nTz11yPY5c+Yc1HbZZZdx2WWXhR1SWgut+Lt7btxsi7COIyIiR0/f8BURiSAVfxGRCFLxFxGJIBV/\nEZEIUvEXEYkgFX8RkQhS8RcRiSAVfxGRCFLxFxGJIBV/EZEIUvEXEYkgFX8RkQhS8RcRiSAVfxGR\nCFLxF5GkmDhxIoMGDaJ9+/ZMmDABgJKSErp3705+fj4FBQXMnz8/yVHWXKE9z9/MhgLXAu8DU4EJ\nQB1go7v3qGj7nbt2kztqVljhVZvheeUMVB4pQ3kkT2ncsKxLly5l6tSpPPjgg/Ts2ZPevXvTr18/\nRo4cye23306fPn2YPXs2I0eOTLuB0dNFmCN5XQf0BMqAd4De7r7SzE4M8ZgikgY++OADunXrRr16\n9cjIyKBHjx7MmDEDM2Pr1tggf1u2bKF58+ZJjrTmCqXbx8wmA62BV4AfAzPcfSWAu68P45gikj46\ndOjAW2+9xZYtW9ixYwezZ89m1apVTJgwgZtuuomWLVsyYsQI7r777mSHWmOZu4ezY7NSoAC4jVh3\nT3vgWGCiuz9+mG2GAEMAsrMbdx0zYWoosVWnJvVh3c5kR1F5yiO1pGMeeTlZ+83PmjWLmTNn0rBh\nQ3Jzc6lTpw579uyhU6dO9OjRg7lz5/Lyyy8zbty4JEWcuLKyMjIzM5MdBgBFRUUL3b2govWqo/jf\nEfx7JlAfeBf4gbt/eKTtW7Vu47UumhhKbNVpeF4545aE2btWPZRHaknHPOL7/PcqLi6msLCQW265\nhRYtWjB69Gg2b96MmeHuZGVl7esGSmV780gFZpZQ8a+O357VwGfuvh3YbmZvAp2AIxb/+nVqs/wQ\nvyzppri4mNIBhckOo9KUR2qpCXmsXx/rAV65ciUzZsxg3rx5TJo0iTfeeIPCwkJef/11Tj755CRH\nWXNVR/F/AfgfM8sA6gLdgPHVcFwRSWEXXHABq1atIisri/vvv5/jjz+eqVOnMmzYMMrLy6lXrx5T\npkxJdpg1VujF390/MLNXgcXAHuBhd18a9nFFJLW99dZbB3WXnH766SxcuDB5QUVIaMXf3XPjpu8F\n7g3rWCIicnT0DV8RkQhS8RcRiSAVfxGRCFLxFxGJIBV/EZEIUvEXEYkgFX8RkQhS8RcRiSAVfxGR\nCFLxFxGJIBV/EZEIOurib2bfMLOOYQQjIiLVI6Hib2bFZnacmTUiGJDdzO4LNzQREQlLomf+We6+\nFegPPO7u3YgNzi4iImko0eKfYWbNgIuAl0OMR0Sqwfjx42nfvj0dOnTgkksu4YsvvuCOO+4gJyeH\n/Px88vPzmT17drLDlBAlWvx/DvwR+Je7/9XMWgMfHWkDMxtqZh+Y2XYzKwleS81sd9B9JCJJ8Mkn\nn/Db3/6WBQsWsHTpUnbv3s3TTz8NwI033khJSQklJSX07ds3yZFKmBIazMXdnwGeiZv/N3BBBZtd\nB/R099V7G8zsHOBGd/+8omPu3LWb3FGzEgkvpQ3PK2eg8kgZUc3jwMHTy8vL2blzJ3Xq1GHHjh00\nb96c0tLSKo5SUlmiF3xPMbM5ZrY0mO9oZrcdYf3JQGvgFTO7MW7RJcBTlQlYRConJyeHESNG0KpV\nK5o1a0ZWVhZnnXUWAJMmTaJjx44MHjyYTZs2JTlSCZO5e8Urmb0B3AQ85O6dg7al7t7hCNuUAgXu\nvjGYbwCsBtoc7szfzIYAQwCysxt3HTNh6tFlk4Ka1Id1O5MdReUpj9RytHnk5WTtm962bRu33347\nY8aMITMzkzvuuIMePXrQtWtXsrKyMDMeeeQRPvvsM26++eYQov9KWVkZmZmZoR6jOqRSHkVFRQvd\nvaCi9RIdw7eBu883s/i28qOM6Rzg7SN1+bj7FGAKQKvWbXzcktDHlw/d8LxylEfqiGoepQMK900/\n88wzdO7cmfPOOw+ANWvWMG/ePPr3779vndatW9OvX7/9BlcPw4EDuKerdMwj0d+ejWb2LcABzOxC\nYO1RHutijqLLp36d2iw/oJ8yHRUXF+/3h5eulEdqqUwerVq1Yt68eezYsYP69eszZ84cCgoKWLt2\nLc2aNQNg5syZdOhw2A/2UgMkWvx/TOyMvK2ZfQKsAAYkehAzywJ6AJcedYQiUqW6devGhRdeSJcu\nXcjIyKBz584MGTKEq666ipKSEsyM3NxcHnrooWSHKiGqsPibWS1iffc9zawhUMvdtx3lcc4HXnP3\n7V8nSBGpWnfeeSd33nnnfm3Tp09PUjSSDBXe7ePue4CRwfT2RAu/u+fuvdjr7o+5+8WVilRERKpM\nol/y+rOZjTCzlmbWaO8r1MhERCQ0ifb5/1fw74/j2pzYvfwiIpJmEv2G7zfDDkRERKpPQsXfzC4/\nVLu7P1614YiISHVItNvntLjpesCZxJ7rr+IvIpKGEu32uT5+3syOB54OJSIREQnd1x3Ddzug6wAi\nImkq0T7/lwge7UDsDaMdcY94FhGR9JJon/9v4qbLgY/jn9MvIiLpJdFun77u/kbwetvdV5vZr0ON\nTEREQpNo8e91iLY+VRmIiIhUnyN2+5jZtcSGY2xtZovjFh0LvB1mYCIiEp6K+vz/F3gFuBsYFde+\nLZFxeEVEJDUdsfi7+xZgC7GxdzGzE4l9ySvTzDLdfWX4IYqISFVLdAD3c8zsI2KDuLwBlBL7RCBS\no23evJkLL7yQtm3bcuqpp/Luu+/ys5/9jI4dO5Kfn89ZZ53FmjVrkh2myFFL9ILvL4HuwIfBQ97O\nBOYdaQMzG2pmH5jZk8H8aWZWHgwBKZIWhg0bRu/evVm2bBmLFi3i1FNP5aabbmLx4sWUlJTQr18/\nfv7znyc7TJGjluh9/rvc/TMzq2Vmtdx9rplNqGCb64CewW2htYFfA68lGtjOXbvJHTUr0dVT1vC8\ncgYqj5RxpDxKDxgzesuWLbz55ps89thjANStW5e6devut8727dsxs1BiFQlTosV/s5llAm8BT5rZ\nemKPeDgkM5tM7Fn/r5jZI8S+Hfwc+z8gTiSlrVixgsaNGzNo0CAWLVpE165dmThxIg0bNuTWW2/l\n8ccfJysri7lz5yY7VJGjZu5e8UqxsXt3EusmGgBkAU+6+2dH2KYUKACOIXbXUBHwCPCyuz97mG2G\nAEMAsrMbdx0zYerR5JKSmtSHdTuTHUXlRSGPvJys/eaXL1/Oddddx6RJk2jXrh2TJk2iYcOGDB48\neN86Tz75JF9++SWDBg0KM+yDlJWVkZmZWa3HDIPyqHpFRUUL3b2govUSKv4AZnYScLK7/9nMGgC1\njzSeb1zxfxAY5+7zzOwxjlD847Vq3cZrXTQxodhS2fC8csYtSfQDVuqKQh4Hdvt8+umndO/endLS\nUgDeeustxo4dy6xZX3UbrVy5kr59+7J06dLQYj6U4uJiCgsLq/WYYVAeVc/MEir+id7t89/As8BD\nQVMO8HyCsRQATwdvBhcCD5jZeQluK5I0TZs2pWXLlixfvhyAOXPm0K5dOz766KN967zwwgu0bds2\nWSGKfG2Jnsr9GPgO8B6Au38U3PNfofghIOPO/Ct846hfpzbLDzgTS0fFxcWUDihMdhiVFtU8Jk2a\nxIABA/jyyy9p3bo1jz76KFdddRXLly+nVq1anHTSSUyePDm8gEVCkmjx/z93/3LvXQ1mlsFXj3gW\nqbHy8/NZsGDBfm3PPfdckqIRqTqJFv83zOwWoL6Z9SJ2G+dLR9rA3XMP0TbwaAMUEZGql+iXvEYB\nG4AlwNXAbOC2sIISEZFwVfRUz1buvtLd9wBTg5eIiKS5is78912YNTN1dIqI1BAVFf/47623DjMQ\nERGpPhUVfz/MtIiIpLGK7vbpZGZbiX0CqB9ME8y7ux8XanQiIhKKigZzqV1dgYiISPVJ9FZPERGp\nQVT8RUQiSMVfRCSCVPxFRCJIxV9EJIJU/EVEIkjFX0QkglT8Je3l5uaSl5dHfn4+BQX7j143btw4\nzIyNGzcmKTqR1BTaoKxmNhS4Fngf+AzoC+wABrr7+2EdV6Jp7ty5ZGdn79e2atUqXnvtNVq1apWk\nqERSV5gjcl8H9AQ6AtcDJwPdiA3o3q2ijXfu2k3uqFkVrZbyhueVM1B5VKkDB1o/nBtvvJF77rmH\nc889N+SIRNJPKN0+ZjaZ2FNAXwFmAo97zDzgeDNrFsZxJZrMjJ49e9K1a1emTJkCxAZWz8nJoVOn\nTkmOTiQ1mXs4D+s0s1KgAHgMGOvufwna5wA3u/uCQ2wzBBgCkJ3duOuYCek/dkyT+rBuZ7KjqLxU\nyiMvJ2u/+Q0bNtC4cWM2bdrEiBEjGDp0KJMnT+bee+8lMzOTiy++mIceeoisrCzKysrIzMxMUuRV\nR3mkllTKo6ioaKG7F1S0XpjdPkfN3acAUwBatW7j45akVHhfy/C8cpRH1SodUHjYZYsWLWLr1q18\n9tln/OQnPwFg48aNXH/99cyfP59ly5ZRWHj47dNFcXGx8kgh6ZhHddzt8wnQMm6+RdAmUmnbt29n\n27Zt+6Zfe+01TjvtNNavX09paSmlpaW0aNGC999/n6ZNmyY5WpHUUR2nci8CPzGzp4ld6N3i7msr\n2qh+ndosT/DCXiorLi4+4plqukjVPNatW8f5558PQHl5OT/60Y/o3bt3kqMSSX3VUfxnE7vN85/E\nbvUcVA3HlIho3bo1ixYtOuI6paWl1ROMSBoJrfi7e27c7I/DOo6IiBw9fcNXRCSCVPxFRCJIxV9E\nJIJU/EVEIkjFX0QkglT8RUQiSMVfRCSCVPxFRCJIxV9EJIJU/EVEIkjFX0QkglT8RUQiSMVfRCSC\nVPxFRCJIxV9Szu7du+ncuTP9+vUDYkMzfve73yUvL49zzjmHrVu3JjlCkfQXWvE3s6Fm9oGZuZkt\nNrMlZvaOmXUK65hSM0ycOJFTTz113/xVV13F2LFjWbJkCeeffz733ntvEqMTqRnCHMnrOqAn0Ar4\nwN03mVkfYgO0d6to4527dpM7alaI4VWP4XnlDFQeh1V6wFCdq1evZtasWdx6663cd999AHz44Yec\nccYZAPTq1Yuzzz6bX/ziF1Uei0iUhHLmb2aTgdbAK0A3d98ULJpHbAB3kUO64YYbuOeee6hV66tf\nzfbt2/PCCy8A8Mwzz7Bq1apkhSdSY5i7h7Njs1KgwN03xrWNANq6+1WH2WYIMAQgO7tx1zETpoYS\nW3VqUh/W7Ux2FJUXVh55OVn7pt99913mzZvHjTfeSElJCb///e+5++67WblyJZMmTWLLli1873vf\nY8aMGfveDI5WWVkZmZmZVRV+0iiP1JJKeRQVFS1094KK1qu24m9mRcADwOnu/llF27dq3cZrXTQx\nlNiq0/C8csYtCbN3rXqElUd8t8/o0aOZPn06GRkZfPHFF2zdupX+/fvzxBNP7Fvnww8/5NJLL2X+\n/Plf63jFxcUUFhZWNuykUx6pJZXyMLOEin+13O1jZh2Bh4FzEyn8Ek133303q1evprS0lKeffprv\nf//7PPHEE6xfvx6APXv28Mtf/pJrrrkmyZGKpL/QT0nNrBUwA7jM3T9MdLv6dWqz/ICLgemouLiY\n0gGFyQ6j0pKZx1NPPcX9998PQP/+/Rk0aFBS4hCpSaqjP2IMcALwgJkBlCfykUSirbCwcN/H6GHD\nhjFs2LDkBiRSw4RW/N09N5i8KniJiEiK0Dd8RUQiSMVfRCSCVPxFRCJIxV9EJIJU/EVEIkjFX0Qk\nglT8RUQiSMVfRCSCVPxFRCJIxV9EJIJU/EVEIkjFX0QkglT8RUQiSMVfRCSCVPzT2KpVqygqKqJd\nu3a0b9+eiRNjw16WlJTQvXt38vPzKSgo+NpDHopIzRXa8/zNbChwLdAUWAXsAcqBG9z9L2EdN0oy\nMjIYN24cXbp0Ydu2bXTt2pVevXoxcuRIbr/9dvr06cPs2bMZOXIkxcXFyQ5XRFJImCN5XQf0BDYD\n293dg7F8/wC0rWjjnbt2kztqVojhVY/heeUMrMI84gc8b9asGc2aNQPg2GOP5dRTT+WTTz7BzNi6\ndSsAW7ZsoXnz5lV2fBGpGUIp/mY2GWgNvAI84u7jg0UNAQ/jmFFXWlrK3/72N7p168aECRM4++yz\nGTFiBHv27OGdd95JdngikmJC6fN392uANUCRu483s/PNbBkwCxgcxjGjrKysjAsuuIAJEyZw3HHH\n8eCDDzJ+/HhWrVrF+PHjufLKK5MdooikGHMP50TczEqBAnffGNd2BjDG3XseZpshwBCA7OzGXcdM\nmBpKbNWpSX1Yt7Pq9peXk7XffHl5OaNHj+a0007joosuAqBfv3689NJLmBnuTr9+/Zg1q3JdT2Vl\nZWRmZlZqH6lAeaQW5VH1ioqKFrp7QUXrhdnnfxB3f9PMWptZdvybQtzyKcAUgFat2/i4JdUaXiiG\n55VTlXmUDijcN+3uXHHFFXzve99jwoQJ+9pbtmyJmVFYWMicOXNo27YthYWFB+/sKBQXF1d6H6lA\neaQW5ZE8oVdXM2sD/Cu44NsFOAb4rKLt6tepzfK4i5vpqri4eL+CXZXefvttpk+fTl5eHvn5+QD8\n6le/YurUqQwbNozy8nLq1avHlClTQjm+iKSv6ji1vgC43Mx2ATuB//Kw+poi5vTTT+dwP8qFCxdW\nczQikk5CK/7unhtM/jp4iYhIitA3fEVEIkjFX0QkglT8RUQiSMVfRCSCVPxFRCJIxV9EJIJU/EVE\nIkjFX0QkglT8RUQiSMVfRCSCVPxFRCJIxV9EJIJU/EVEIkjFX0QkglT8RUQiSMVfRCSCVPxFRCJI\nxV9EJIJU/EVEIshSdSx1M9sGLE92HFUgG9iY7CCqgPJILcojtaRSHie5e+OKVgptAPcqsNzdC5Id\nRGWZ2QJeb2/1AAAFsUlEQVTlkTqUR2pRHsmjbh8RkQhS8RcRiaBULv5Tkh1AFVEeqUV5pBblkSQp\ne8FXRETCk8pn/iIiEhIVfxGRCErJ4m9mvc1suZn908xGJTueRJhZSzOba2b/MLO/m9mwoL2Rmf3J\nzD4K/v1GsmNNhJnVNrO/mdnLwXza5WFmx5vZs2a2zMw+MLPvpmkeNwa/U0vN7Ckzq5cOeZjZI2a2\n3syWxrUdNm4zGx38zS83s7OTE/XBDpPHvcHv1WIzm2lmx8ctS8k8DpRyxd/MagP3A32AdsAlZtYu\nuVElpBwY7u7tgO7Aj4O4RwFz3P1kYE4wnw6GAR/EzadjHhOBV929LdCJWD5plYeZ5QBDgQJ37wDU\nBi4mPfJ4DOh9QNsh4w7+Vi4G2gfbPBDUglTwGAfn8Segg7t3BD4ERkPK57GflCv+wHeAf7r7v939\nS+Bp4Nwkx1Qhd1/r7u8H09uIFZocYrFPC1abBpyXnAgTZ2YtgB8AD8c1p1UeZpYFnAH8DsDdv3T3\nzaRZHoEMoL6ZZQANgDWkQR7u/ibw+QHNh4v7XOBpd/8/d18B/JNYLUi6Q+Xh7q+5e3kwOw9oEUyn\nbB4HSsXinwOsiptfHbSlDTPLBToD7wFN3H1tsOhToEmSwjoaE4CRwJ64tnTL45vABuDRoPvqYTNr\nSJrl4e6fAL8BVgJrgS3u/hpplkecw8Wdzn/3g4FXgum0ySMVi39aM7NM4DngBnffGr/MY/fVpvS9\ntWbWD1jv7gsPt0465EHsbLkL8KC7dwa2c0DXSDrkEfSJn0vszaw50NDMLo1fJx3yOJR0jTuemd1K\nrMv3yWTHcrRSsfh/ArSMm28RtKU8M6tDrPA/6e4zguZ1ZtYsWN4MWJ+s+BL0PeCHZlZKrMvt+2b2\nBOmXx2pgtbu/F8w/S+zNIN3y6AmscPcN7r4LmAH8B+mXx16Hizvt/u7NbCDQDxjgX31hKm3ySMXi\n/1fgZDP7ppnVJXbx5MUkx1QhMzNi/csfuPt9cYteBK4Ipq8AXqju2I6Gu4929xbunkvsZ/+6u19K\n+uXxKbDKzL4dNJ0J/IM0y4NYd093M2sQ/I6dSex6Urrlsdfh4n4RuNjMjjGzbwInA/OTEF9CzKw3\nsa7RH7r7jrhF6ZOHu6fcC+hL7Ar6v4Bbkx1PgjGfTuwj7GKgJHj1BU4gdlfDR8CfgUbJjvUocioE\nXg6m0y4PIB9YEPyfPA98I03zuBNYBiwFpgPHpEMewFPErlPsIvZJ7MojxQ3cGvzNLwf6JDv+CvL4\nJ7G+/b1/65NTPY8DX3q8g4hIBKVit4+IiIRMxV9EJIJU/EVEIkjFX0QkglT8RUQiKJUHcBcJhZnt\nBpbENZ3n7qVJCkckKXSrp0SOmZW5e2Y1Hi/Dv3oImEhKULePyAHMrJmZvWlmJcEz9P8zaO9tZu+b\n2SIzmxO0NTKz54Pnus8zs45B+x1mNt3M3gamB+Mj3Gtmfw3WvTqJKYqo20ciqb6ZlQTTK9z9/AOW\n/wj4o7vfFTyLvYGZNQamAme4+wozaxSseyfwN3c/z8y+DzxO7JvFEBuP4nR332lmQ4g9kfM0MzsG\neNvMXvPYY39Fqp2Kv0TRTnfPP8LyvwKPBA/qe97dS8ysEHhzb7F2973Pdz8duCBoe93MTjCz44Jl\nL7r7zmD6LKCjmV0YzGcRe+6Lir8khYq/yAHc/U0zO4PYgDaPmdl9wKavsavtcdMGXO/uf6yKGEUq\nS33+Igcws5OAde4+ldhoZl2IjdZ0RvCkRuK6fd4CBgRthcBGP2Ach8AfgWuDTxOY2SnB4DIiSaEz\nf5GDFQI3mdkuoAy43N03BP32M8ysFrHn0PcC7iDWRbQY2MFXjys+0MNALvB+8GjmDaTg0IsSHbrV\nU0QkgtTtIyISQSr+IiIRpOIvIhJBKv4iIhGk4i8iEkEq/iIiEaTiLyISQf8fnmI5XmYE85oAAAAA\nSUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x146c4cf8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from xgboost import plot_importance\n",
    "from matplotlib import pyplot\n",
    "\n",
    "\n",
    "model.fit(X, Y)\n",
    "\n",
    "plot_importance(model)\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 调参"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "如何调参呢，下面是三个超参数的一般实践最佳值，可以先将它们设定为这个范围，然后画出 learning curves，再调解参数找到最佳模型：\n",
    "\n",
    "learning_rate ＝ 0.1 或更小，越小就需要多加入弱学习器；\n",
    "tree_depth ＝ 2～8；\n",
    "subsample ＝ 训练集的 30%～80%；\n",
    "\n",
    "接下来我们用 GridSearchCV 来进行调参会更方便一些：\n",
    "可以调的超参数组合有：  \n",
    "树的个数和大小 (n_estimators and max_depth).  \n",
    "学习率和树的个数 (learning_rate and n_estimators).  \n",
    "行列的 subsampling rates (subsample, colsample_bytree and colsample_bylevel).  \n",
    "下面以学习率为例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "设定要调节的 learning_rate = [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3]\n",
    "和原代码相比就是在 model 后面加上 grid search 这几行："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBClassifier()\n",
    "learning_rate = [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3]\n",
    "param_grid = dict(learning_rate=learning_rate)\n",
    "kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)\n",
    "grid_search = GridSearchCV(model, param_grid, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold)\n",
    "grid_result = grid_search.fit(X, Y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.483013 using {'learning_rate': 0.1}\n"
     ]
    }
   ],
   "source": [
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "打印出每个学习率对应的分数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.689650 (0.000242) with: {'learning_rate': 0.0001}\n",
      "-0.661274 (0.001954) with: {'learning_rate': 0.001}\n",
      "-0.530747 (0.022961) with: {'learning_rate': 0.01}\n",
      "-0.483013 (0.060755) with: {'learning_rate': 0.1}\n",
      "-0.515440 (0.068974) with: {'learning_rate': 0.2}\n",
      "-0.557315 (0.081738) with: {'learning_rate': 0.3}\n"
     ]
    }
   ],
   "source": [
    "means = grid_result.cv_results_['mean_test_score']\n",
    "stds = grid_result.cv_results_['std_test_score']\n",
    "params = grid_result.cv_results_['params']\n",
    "for mean, stdev, param in zip(means, stds, params):\n",
    "    print(\"%f (%f) with: %r\" % (mean, stdev, param))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
